library("plyr")
library("dplyr")
library("ggplot2")
library(R.utils)
library(gghighlight)
library(ggman)
library(ggtext)
library(patchwork)
library(plotrix)
library(qqman)
library(qvalue)
library(reshape2)
library(tidyr)
library(zoo)
library(infer)
options(dplyr.summarise.inform = FALSE)
library(bigsnpr)
library("wesanderson")
library("directlabels")
library(OutFLANK)
library(adegenet)
library(poppr)
library(vcfR)
library(stringr)
library(matrixStats)
library(purrr)
library(scales)
Nucleotide diversity (often referred to using the symbol π) is the average pairwise difference between all possible pairs of individuals in your sample. It is a very intuitive and simple measure of genetic diversity, and is accurately estimated even with very few samples. A formal definition is here.
We can obtain the nucleotide diversity (π) from our VCF file using vcftools software. In our case we will collect the π value from each 10 kb (10,000 bp) window of the genome.
NB: vcftools is a very flexible tool for analyzing, manipulating VCF files. It can do many other wonderful things. The vcftools manual is on github here (https://vcftools.sourceforge.net/man_latest.html).
I believe that an important step would be to compare nucleotide diversity between the different treatment groups. The following code present information for all treatment groups and compares it to each individual treatment group.
The following was run on the command line
# Make ROD_CADO_working directory in home
mkdir ROD_CADO_working
cd ROD_CADO_working
# Make Nucleotide_diversity directory
mkdir ROD_CADO_working
# create popmap file with sample and treatment names
cp /home/Shared_Data/ROD_CADO/analysis/popmap popmap
# manually add in treatment names to popmap file (w/ code)
head treat_popmap
C1_2 con-con C1_4 con-con C1_5 con-con C1_6 con-con C1_7 con-con C1_8 con-con C1_9 con-con C2_10 con-con C2_11 con-con C2_2 con-con
Create treatment specific files containing a single column of all of the sample names within that treatment
awk '$2 == "con-con" {print $1}' treat_popmap > con-con.txt | awk '$2 == "str-con" {print $1}' treat_popmap > str-con.txt | awk '$2 == "con-rod" {print $1}' treat_popmap > con-rod.txt | awk '$2 == "str-rod" {print $1}' treat_popmap > str-rod.txt
#bcftools view --threads 20 -S SNP.TRSdp10g1.FIL.vcf | vcftools --vcf - --window-pi 10000 --out ROD.CADO.all.pi
# For con-con
# Step 1: Filter VCF for population subset
vcftools --gzvcf SNP.TRSdp10g1.FIL.vcf.gz --keep con-con.txt --recode --recode-INFO-all --out temp_concon_filtered
# Step 2: bgzip output
bgzip temp_concon_filtered.recode.vcf
# Step 3: Calculate windowed pi
vcftools --gzvcf temp_concon.filtered.recode.vcf.gz --window-pi 10000 --out ROD.CADO.con-con.pi.windowed.pi
# For str-con
# Step 1: Filter VCF for population subset
vcftools --gzvcf SNP.TRSdp10g1.FIL.vcf.gz --keep popmap_files/str-con.txt --recode --recode-INFO-all --out temp_strcon_filtered
# Step 2: bgzip output
bgzip temp_strcon_filtered.recode.vcf
# Step 3: # Step 3: Calculate windowed pi
vcftools --gzvcf temp_strcon_filtered.recode.vcf.gz --window-pi 10000 --out ROD.CADO.str-con.pi.windowed.pi
# For con-rod
# Step 1: Filter VCF for population subset
vcftools --gzvcf SNP.TRSdp10g1.FIL.vcf --keep popmap_files/con-rod.txt --recode --recode-INFO-all --out temp_conrod_filtered
# Step 2: bgzip output
bgzip temp_conrod_filtered.recode.vcf
# Step 3: Calculate windowed pi
vcftools --gzvcf temp_conrod_filtered.recode.vcf.gz --window-pi 10000 --out ROD.CADO.con-rod.pi.windowed.pi
# For str-rod
# Step 1: Filter VCF for population subset
vcftools --gzvcf SNP.TRSdp10g1.FIL.vcf --keep popmap_files/str-rod.txt --recode --recode-INFO-all --out temp_strrod_filtered
# Step 2: bgzip output
bgzip temp_strrod_filtered.recode.vcf
# Step 3: Calculate windowed pi
vcftools --gzvcf temp_strrod_filtered.recode.vcf.gz --window-pi 10000 --out ROD.CADO.str-rod.pi.windowed.pi
#!/bin/bash
VCF=SNP.TRSdp10g1.FIL.vcf.gz
POPS=("con-con" "str-con" "con-rod" "str-rod")
for POP in "${POPS[@]}"; do
echo "Processing $POP..."
KEEP="popmap_files/${POP}.txt"
OUT_PREFIX="temp_${POP//-}"
REC_VCF="${OUT_PREFIX}.recode.vcf"
REC_VCFGZ="${REC_VCF}.gz"
OUTPUT_PI="ROD.CADO.${POP}.pi.windowed.pi"
# Step 1: Filter and recode
vcftools --gzvcf "$VCF" \
--keep "$KEEP" \
--recode --recode-INFO-all \
--out "$OUT_PREFIX"
# Step 2: Compress VCF and remove uncompressed
bgzip "$REC_VCF"
rm "$REC_VCF"
# Step 3: Calculate windowed pi
vcftools --gzvcf "$REC_VCFGZ" \
--window-pi 10000 \
--out "$OUTPUT_PI"
# Step 4: Clean up compressed VCF
rm "$REC_VCFGZ"
echo "Finished processing $POP"
echo "---------------------------"
done
chmod +x run_pi_calculations.sh
# Run in tmux
tmux new -s pi_calc
# Reattach later
tmux attach-session -t pi_calc
pi.all.dataframe<-read.table("/home/Shared_Data/ROD_CADO/analysis/raw.vcf/ROD.CADO.all.pi.windowed.pi", sep="\t", header=T)
pi.concon.dataframe<-read.table("/home/jgreen/ROD_CADO_working/Nucleotide_diversity/ROD.CADO.con-con.pi.windowed.pi.windowed.pi", sep="\t", header=T)
pi.conrod.dataframe<-read.table("/home/jgreen/ROD_CADO_working/Nucleotide_diversity/ROD.CADO.con-rod.pi.windowed.pi.windowed.pi", sep="\t", header=T)
pi.strcon.dataframe<-read.table("/home/jgreen/ROD_CADO_working/Nucleotide_diversity/ROD.CADO.str-con.pi.windowed.pi.windowed.pi", sep="\t", header=T)
pi.strrod.dataframe<-read.table("/home/jgreen/ROD_CADO_working/Nucleotide_diversity/ROD.CADO.str-rod.pi.windowed.pi.windowed.pi", sep="\t", header=T)
#Here is the color pallette that we will use for everything:
col_pal <- c("#0072B2", "#56B4E9", "#E69F00", "#F0E442")
#Let's factor treatments as follows:
df$TREAT <- factor(df$TREAT, levels=c("CONCON", "STRCON", "CONROD", "STRROD"))
pi.all.dataframe %>%
mutate(CHROM = str_replace(CHROM, "NC_035780.1", "1")) %>%
mutate(CHROM = str_replace(CHROM, "NC_035781.1", "2")) %>%
mutate(CHROM = str_replace(CHROM, "NC_035782.1", "3")) %>%
mutate(CHROM = str_replace(CHROM, "NC_035783.1", "4")) %>%
mutate(CHROM = str_replace(CHROM, "NC_035784.1", "5")) %>%
mutate(CHROM = str_replace(CHROM, "NC_035785.1", "6")) %>%
mutate(CHROM = str_replace(CHROM, "NC_035786.1", "7")) %>%
mutate(CHROM = str_replace(CHROM, "NC_035787.1", "8")) %>%
mutate(CHROM = str_replace(CHROM, "NC_035788.1", "9")) %>%
mutate(CHROM = str_replace(CHROM, "NC_035789.1", "10")) -> pi.all.df
pi.all.df$CHROM <- as.factor(pi.all.df$CHROM)
pi.concon.dataframe %>%
mutate(CHROM = str_replace(CHROM, "NC_035780.1", "1")) %>%
mutate(CHROM = str_replace(CHROM, "NC_035781.1", "2")) %>%
mutate(CHROM = str_replace(CHROM, "NC_035782.1", "3")) %>%
mutate(CHROM = str_replace(CHROM, "NC_035783.1", "4")) %>%
mutate(CHROM = str_replace(CHROM, "NC_035784.1", "5")) %>%
mutate(CHROM = str_replace(CHROM, "NC_035785.1", "6")) %>%
mutate(CHROM = str_replace(CHROM, "NC_035786.1", "7")) %>%
mutate(CHROM = str_replace(CHROM, "NC_035787.1", "8")) %>%
mutate(CHROM = str_replace(CHROM, "NC_035788.1", "9")) %>%
mutate(CHROM = str_replace(CHROM, "NC_035789.1", "10")) -> pi.concon.df
pi.concon.df$CHROM <- as.factor(pi.concon.df$CHROM)
pi.conrod.dataframe %>%
mutate(CHROM = str_replace(CHROM, "NC_035780.1", "1")) %>%
mutate(CHROM = str_replace(CHROM, "NC_035781.1", "2")) %>%
mutate(CHROM = str_replace(CHROM, "NC_035782.1", "3")) %>%
mutate(CHROM = str_replace(CHROM, "NC_035783.1", "4")) %>%
mutate(CHROM = str_replace(CHROM, "NC_035784.1", "5")) %>%
mutate(CHROM = str_replace(CHROM, "NC_035785.1", "6")) %>%
mutate(CHROM = str_replace(CHROM, "NC_035786.1", "7")) %>%
mutate(CHROM = str_replace(CHROM, "NC_035787.1", "8")) %>%
mutate(CHROM = str_replace(CHROM, "NC_035788.1", "9")) %>%
mutate(CHROM = str_replace(CHROM, "NC_035789.1", "10")) -> pi.conrod.df
pi.conrod.df$CHROM <- as.factor(pi.conrod.df$CHROM)
pi.strcon.dataframe %>%
mutate(CHROM = str_replace(CHROM, "NC_035780.1", "1")) %>%
mutate(CHROM = str_replace(CHROM, "NC_035781.1", "2")) %>%
mutate(CHROM = str_replace(CHROM, "NC_035782.1", "3")) %>%
mutate(CHROM = str_replace(CHROM, "NC_035783.1", "4")) %>%
mutate(CHROM = str_replace(CHROM, "NC_035784.1", "5")) %>%
mutate(CHROM = str_replace(CHROM, "NC_035785.1", "6")) %>%
mutate(CHROM = str_replace(CHROM, "NC_035786.1", "7")) %>%
mutate(CHROM = str_replace(CHROM, "NC_035787.1", "8")) %>%
mutate(CHROM = str_replace(CHROM, "NC_035788.1", "9")) %>%
mutate(CHROM = str_replace(CHROM, "NC_035789.1", "10")) -> pi.strcon.df
pi.strcon.df$CHROM <- as.factor(pi.strcon.df$CHROM)
pi.strrod.dataframe %>%
mutate(CHROM = str_replace(CHROM, "NC_035780.1", "1")) %>%
mutate(CHROM = str_replace(CHROM, "NC_035781.1", "2")) %>%
mutate(CHROM = str_replace(CHROM, "NC_035782.1", "3")) %>%
mutate(CHROM = str_replace(CHROM, "NC_035783.1", "4")) %>%
mutate(CHROM = str_replace(CHROM, "NC_035784.1", "5")) %>%
mutate(CHROM = str_replace(CHROM, "NC_035785.1", "6")) %>%
mutate(CHROM = str_replace(CHROM, "NC_035786.1", "7")) %>%
mutate(CHROM = str_replace(CHROM, "NC_035787.1", "8")) %>%
mutate(CHROM = str_replace(CHROM, "NC_035788.1", "9")) %>%
mutate(CHROM = str_replace(CHROM, "NC_035789.1", "10")) -> pi.strrod.df
pi.strrod.df$CHROM <- as.factor(pi.strrod.df$CHROM)
# Create named vector to map chromosome names
chrom_map <- setNames(as.character(1:10), paste0("NC_03578", 0:9, ".1"))
# List of original dataframe names (as strings)
input_names <- c(
"pi.all.dataframe",
"pi.concon.dataframe",
"pi.conrod.dataframe",
"pi.strcon.dataframe",
"pi.strrod.dataframe"
)
# Corresponding output dataframe names
output_names <- c(
"pi.all.df",
"pi.concon.df",
"pi.conrod.df",
"pi.strcon.df",
"pi.strrod.df"
)
# Loop through each dataframe
for (i in seq_along(input_names)) {
df <- get(input_names[i]) # retrieve the dataframe by name
# Replace chromosome names
for (old in names(chrom_map)) {
df <- df %>% mutate(CHROM = str_replace(CHROM, old, chrom_map[[old]]))
}
# Convert to factor
df$CHROM <- as.factor(df$CHROM)
# Assign to new name in global environment
assign(output_names[i], df)
}
summary(pi.all.df)
CHROM BIN_START BIN_END N_VARIANTS PI
5 :7644 Min. : 1 Min. : 10000 Min. : 1.0 Min. :2.480e-06
3 :5256 1st Qu.: 18235001 1st Qu.: 18245000 1st Qu.: 79.0 1st Qu.:1.587e-03
1 :4775 Median : 36560001 Median : 36570000 Median : 390.0 Median :8.018e-03
2 :4506 Mean : 38071477 Mean : 38081476 Mean : 379.1 Mean :7.928e-03
4 :4503 3rd Qu.: 54970001 3rd Qu.: 54980000 3rd Qu.: 601.0 3rd Qu.:1.265e-02
9 :4402 Max. :104140001 Max. :104150000 Max. :1405.0 Max. :3.170e-02
(Other):9893
by(pi.all.df, pi.all.df$CHROM, summary)
pi.all.df$CHROM: 1
CHROM BIN_START BIN_END N_VARIANTS PI
1 :4775 Min. : 1 Min. : 10000 Min. : 1.0 Min. :2.484e-06
10 : 0 1st Qu.:17395001 1st Qu.:17405000 1st Qu.: 159.0 1st Qu.:3.297e-03
2 : 0 Median :32860001 Median :32870000 Median : 432.0 Median :8.840e-03
3 : 0 Mean :32981320 Mean :32991319 Mean : 411.4 Mean :8.537e-03
4 : 0 3rd Qu.:49655001 3rd Qu.:49665000 3rd Qu.: 616.0 3rd Qu.:1.284e-02
5 : 0 Max. :65650001 Max. :65660000 Max. :1318.0 Max. :2.874e-02
(Other): 0
------------------------------------------------------------------------------------------------------
pi.all.df$CHROM: 10
CHROM BIN_START BIN_END N_VARIANTS PI
10 :1182 Min. : 1 Min. : 10000 Min. : 1.0 Min. :2.480e-06
1 : 0 1st Qu.: 5392501 1st Qu.: 5402500 1st Qu.: 4.0 1st Qu.:7.834e-05
2 : 0 Median :11405001 Median :11415000 Median : 54.0 Median :1.049e-03
3 : 0 Mean :14438910 Mean :14448909 Mean : 238.2 Mean :5.024e-03
4 : 0 3rd Qu.:23822501 3rd Qu.:23832500 3rd Qu.: 435.8 3rd Qu.:9.352e-03
5 : 0 Max. :32640001 Max. :32650000 Max. :1217.0 Max. :3.170e-02
(Other): 0
------------------------------------------------------------------------------------------------------
pi.all.df$CHROM: 2
CHROM BIN_START BIN_END N_VARIANTS PI
2 :4506 Min. : 120001 Min. : 130000 Min. : 1.0 Min. :2.484e-06
1 : 0 1st Qu.:12982501 1st Qu.:12992500 1st Qu.: 258.0 1st Qu.:5.168e-03
10 : 0 Median :27065001 Median :27075000 Median : 480.0 Median :9.915e-03
3 : 0 Mean :28315551 Mean :28325550 Mean : 451.2 Mean :9.495e-03
4 : 0 3rd Qu.:42497501 3rd Qu.:42507500 3rd Qu.: 641.8 3rd Qu.:1.364e-02
5 : 0 Max. :61750001 Max. :61760000 Max. :1255.0 Max. :2.637e-02
(Other): 0
------------------------------------------------------------------------------------------------------
pi.all.df$CHROM: 3
CHROM BIN_START BIN_END N_VARIANTS PI
3 :5256 Min. : 170001 Min. : 180000 Min. : 1.0 Min. :2.484e-06
1 : 0 1st Qu.:24367501 1st Qu.:24377500 1st Qu.: 160.0 1st Qu.:3.282e-03
10 : 0 Median :42575001 Median :42585000 Median : 425.5 Median :8.819e-03
2 : 0 Mean :41219491 Mean :41229490 Mean : 406.1 Mean :8.469e-03
4 : 0 3rd Qu.:58222501 3rd Qu.:58232500 3rd Qu.: 605.0 3rd Qu.:1.265e-02
5 : 0 Max. :77050001 Max. :77060000 Max. :1243.0 Max. :3.054e-02
(Other): 0
------------------------------------------------------------------------------------------------------
pi.all.df$CHROM: 4
CHROM BIN_START BIN_END N_VARIANTS PI
4 :4503 Min. : 1110001 Min. : 1120000 Min. : 1 Min. :2.484e-06
1 : 0 1st Qu.:15335001 1st Qu.:15345000 1st Qu.: 251 1st Qu.:4.919e-03
10 : 0 Median :29110001 Median :29120000 Median : 449 Median :9.126e-03
2 : 0 Mean :29882215 Mean :29892214 Mean : 430 Mean :8.924e-03
3 : 0 3rd Qu.:45285001 3rd Qu.:45295000 3rd Qu.: 605 3rd Qu.:1.268e-02
5 : 0 Max. :58750001 Max. :58760000 Max. :1317 Max. :2.658e-02
(Other): 0
------------------------------------------------------------------------------------------------------
pi.all.df$CHROM: 5
CHROM BIN_START BIN_END N_VARIANTS PI
5 :7644 Min. : 660001 Min. : 670000 Min. : 1.0 Min. :2.484e-06
1 : 0 1st Qu.:25677501 1st Qu.:25687500 1st Qu.: 192.0 1st Qu.:3.927e-03
10 : 0 Median :49065001 Median :49075000 Median : 442.0 Median :9.203e-03
2 : 0 Mean :48514738 Mean :48524737 Mean : 422.8 Mean :8.913e-03
3 : 0 3rd Qu.:70842501 3rd Qu.:70852500 3rd Qu.: 620.0 3rd Qu.:1.317e-02
4 : 0 Max. :98660001 Max. :98670000 Max. :1363.0 Max. :2.826e-02
(Other): 0
------------------------------------------------------------------------------------------------------
pi.all.df$CHROM: 6
CHROM BIN_START BIN_END N_VARIANTS PI
6 :2244 Min. : 150001 Min. : 160000 Min. : 1.0 Min. :2.484e-06
1 : 0 1st Qu.:14317501 1st Qu.:14327500 1st Qu.: 11.0 1st Qu.:2.004e-04
10 : 0 Median :30895001 Median :30905000 Median : 188.5 Median :4.145e-03
2 : 0 Mean :26803045 Mean :26813044 Mean : 277.0 Mean :6.240e-03
3 : 0 3rd Qu.:37502501 3rd Qu.:37512500 3rd Qu.: 502.2 3rd Qu.:1.155e-02
4 : 0 Max. :51240001 Max. :51250000 Max. :1201.0 Max. :2.765e-02
(Other): 0
------------------------------------------------------------------------------------------------------
pi.all.df$CHROM: 7
CHROM BIN_START BIN_END N_VARIANTS PI
7 :2721 Min. : 190001 Min. : 200000 Min. : 1 Min. :2.484e-06
1 : 0 1st Qu.:15630001 1st Qu.:15640000 1st Qu.: 13 1st Qu.:2.532e-04
10 : 0 Median :33750001 Median :33760000 Median : 168 Median :3.510e-03
2 : 0 Mean :31211409 Mean :31221408 Mean : 306 Mean :6.322e-03
3 : 0 3rd Qu.:46220001 3rd Qu.:46230000 3rd Qu.: 585 3rd Qu.:1.192e-02
4 : 0 Max. :57830001 Max. :57840000 Max. :1349 Max. :2.803e-02
(Other): 0
------------------------------------------------------------------------------------------------------
pi.all.df$CHROM: 8
CHROM BIN_START BIN_END N_VARIANTS PI
8 :3746 Min. : 20001 Min. : 30000 Min. : 1.0 Min. :2.484e-06
1 : 0 1st Qu.:22132501 1st Qu.:22142500 1st Qu.: 23.0 1st Qu.:4.535e-04
10 : 0 Median :44100001 Median :44110000 Median : 236.0 Median :4.775e-03
2 : 0 Mean :39928872 Mean :39938871 Mean : 334.1 Mean :6.939e-03
3 : 0 3rd Qu.:57717501 3rd Qu.:57727500 3rd Qu.: 606.0 3rd Qu.:1.254e-02
4 : 0 Max. :75940001 Max. :75950000 Max. :1405.0 Max. :2.913e-02
(Other): 0
------------------------------------------------------------------------------------------------------
pi.all.df$CHROM: 9
CHROM BIN_START BIN_END N_VARIANTS PI
9 :4402 Min. : 10001 Min. : 20000 Min. : 1.0 Min. :2.484e-06
1 : 0 1st Qu.: 30372501 1st Qu.: 30382500 1st Qu.: 10.0 1st Qu.:1.935e-04
10 : 0 Median : 60985001 Median : 60995000 Median : 151.0 Median :2.901e-03
2 : 0 Mean : 54813006 Mean : 54823005 Mean : 283.8 Mean :5.765e-03
3 : 0 3rd Qu.: 76877501 3rd Qu.: 76887500 3rd Qu.: 527.0 3rd Qu.:1.080e-02
4 : 0 Max. :104140001 Max. :104150000 Max. :1372.0 Max. :2.812e-02
(Other): 0
cor(pi.all.df$N_VARIANTS, pi.all.df$PI)
[1] 0.9741407
summary(pi.concon.df)
CHROM BIN_START BIN_END N_VARIANTS PI
5 :7631 Min. : 1 Min. : 10000 Min. : 1.0 Min. :0.000005
3 :5247 1st Qu.: 18245001 1st Qu.: 18255000 1st Qu.: 70.0 1st Qu.:0.001647
1 :4769 Median : 36570001 Median : 36580000 Median : 337.0 Median :0.008080
2 :4497 Mean : 38068576 Mean : 38078575 Mean : 327.2 Mean :0.008000
4 :4497 3rd Qu.: 54965001 3rd Qu.: 54975000 3rd Qu.: 518.0 3rd Qu.:0.012740
9 :4362 Max. :104140001 Max. :104150000 Max. :1231.0 Max. :0.030658
(Other):9824
by(pi.concon.df, pi.concon.df$CHROM, summary)
pi.concon.df$CHROM: 1
CHROM BIN_START BIN_END N_VARIANTS PI
1 :4769 Min. : 1 Min. : 10000 Min. : 1.0 Min. :0.000005
10 : 0 1st Qu.:17390001 1st Qu.:17400000 1st Qu.: 138.0 1st Qu.:0.003378
2 : 0 Median :32870001 Median :32880000 Median : 369.0 Median :0.008848
3 : 0 Mean :32985052 Mean :32995051 Mean : 353.2 Mean :0.008564
4 : 0 3rd Qu.:49650001 3rd Qu.:49660000 3rd Qu.: 529.0 3rd Qu.:0.012829
5 : 0 Max. :65650001 Max. :65660000 Max. :1098.0 Max. :0.029034
(Other): 0
------------------------------------------------------------------------------------------------------
pi.concon.df$CHROM: 10
CHROM BIN_START BIN_END N_VARIANTS PI
10 :1169 Min. : 1 Min. : 10000 Min. : 1.0 Min. :5.000e-06
1 : 0 1st Qu.: 5380001 1st Qu.: 5390000 1st Qu.: 4.0 1st Qu.:8.679e-05
2 : 0 Median :11360001 Median :11370000 Median : 48.0 Median :1.067e-03
3 : 0 Mean :14375852 Mean :14385851 Mean : 205.9 Mean :5.081e-03
4 : 0 3rd Qu.:23610001 3rd Qu.:23620000 3rd Qu.: 376.0 3rd Qu.:9.252e-03
5 : 0 Max. :32640001 Max. :32650000 Max. :1046.0 Max. :2.980e-02
(Other): 0
------------------------------------------------------------------------------------------------------
pi.concon.df$CHROM: 2
CHROM BIN_START BIN_END N_VARIANTS PI
2 :4497 Min. : 120001 Min. : 130000 Min. : 1 Min. :0.000005
1 : 0 1st Qu.:12970001 1st Qu.:12980000 1st Qu.: 220 1st Qu.:0.005125
10 : 0 Median :27030001 Median :27040000 Median : 405 Median :0.009877
3 : 0 Mean :28285614 Mean :28295613 Mean : 384 Mean :0.009485
4 : 0 3rd Qu.:42380001 3rd Qu.:42390000 3rd Qu.: 547 3rd Qu.:0.013546
5 : 0 Max. :61750001 Max. :61760000 Max. :1089 Max. :0.026280
(Other): 0
------------------------------------------------------------------------------------------------------
pi.concon.df$CHROM: 3
CHROM BIN_START BIN_END N_VARIANTS PI
3 :5247 Min. : 170001 Min. : 180000 Min. : 1.0 Min. :0.000005
1 : 0 1st Qu.:24395001 1st Qu.:24405000 1st Qu.: 141.0 1st Qu.:0.003375
10 : 0 Median :42590001 Median :42600000 Median : 372.0 Median :0.008868
2 : 0 Mean :41239465 Mean :41249464 Mean : 354.2 Mean :0.008546
4 : 0 3rd Qu.:58225001 3rd Qu.:58235000 3rd Qu.: 526.0 3rd Qu.:0.012837
5 : 0 Max. :77050001 Max. :77060000 Max. :1084.0 Max. :0.030658
(Other): 0
------------------------------------------------------------------------------------------------------
pi.concon.df$CHROM: 4
CHROM BIN_START BIN_END N_VARIANTS PI
4 :4497 Min. : 1110001 Min. : 1120000 Min. : 1.0 Min. :0.000005
1 : 0 1st Qu.:15300001 1st Qu.:15310000 1st Qu.: 215.0 1st Qu.:0.005021
10 : 0 Median :29100001 Median :29110000 Median : 387.0 Median :0.009218
2 : 0 Mean :29872512 Mean :29882511 Mean : 369.8 Mean :0.009046
3 : 0 3rd Qu.:45280001 3rd Qu.:45290000 3rd Qu.: 521.0 3rd Qu.:0.012866
5 : 0 Max. :58750001 Max. :58760000 Max. :1090.0 Max. :0.026883
(Other): 0
------------------------------------------------------------------------------------------------------
pi.concon.df$CHROM: 5
CHROM BIN_START BIN_END N_VARIANTS PI
5 :7631 Min. : 660001 Min. : 670000 Min. : 1.0 Min. :0.000005
1 : 0 1st Qu.:25675001 1st Qu.:25685000 1st Qu.: 165.0 1st Qu.:0.003988
10 : 0 Median :49040001 Median :49050000 Median : 381.0 Median :0.009261
2 : 0 Mean :48495906 Mean :48505905 Mean : 364.2 Mean :0.008996
3 : 0 3rd Qu.:70760001 3rd Qu.:70770000 3rd Qu.: 532.0 3rd Qu.:0.013272
4 : 0 Max. :98660001 Max. :98670000 Max. :1108.0 Max. :0.028540
(Other): 0
------------------------------------------------------------------------------------------------------
pi.concon.df$CHROM: 6
CHROM BIN_START BIN_END N_VARIANTS PI
6 :2231 Min. : 150001 Min. : 160000 Min. : 1.0 Min. :0.0000050
1 : 0 1st Qu.:14335001 1st Qu.:14345000 1st Qu.: 10.0 1st Qu.:0.0002154
10 : 0 Median :30970001 Median :30980000 Median : 167.0 Median :0.0043364
2 : 0 Mean :26840857 Mean :26850856 Mean : 243.4 Mean :0.0062448
3 : 0 3rd Qu.:37525001 3rd Qu.:37535000 3rd Qu.: 443.0 3rd Qu.:0.0115789
4 : 0 Max. :51240001 Max. :51250000 Max. :1040.0 Max. :0.0271813
(Other): 0
------------------------------------------------------------------------------------------------------
pi.concon.df$CHROM: 7
CHROM BIN_START BIN_END N_VARIANTS PI
7 :2697 Min. : 190001 Min. : 200000 Min. : 1.0 Min. :0.000005
1 : 0 1st Qu.:15660001 1st Qu.:15670000 1st Qu.: 12.0 1st Qu.:0.000271
10 : 0 Median :33800001 Median :33810000 Median : 150.0 Median :0.003542
2 : 0 Mean :31268774 Mean :31278773 Mean : 265.6 Mean :0.006336
3 : 0 3rd Qu.:46250001 3rd Qu.:46260000 3rd Qu.: 506.0 3rd Qu.:0.011867
4 : 0 Max. :57830001 Max. :57840000 Max. :1220.0 Max. :0.028580
(Other): 0
------------------------------------------------------------------------------------------------------
pi.concon.df$CHROM: 8
CHROM BIN_START BIN_END N_VARIANTS PI
8 :3727 Min. : 20001 Min. : 30000 Min. : 1.0 Min. :0.0000050
1 : 0 1st Qu.:22115001 1st Qu.:22125000 1st Qu.: 21.0 1st Qu.:0.0004772
10 : 0 Median :44120001 Median :44130000 Median : 205.0 Median :0.0048753
2 : 0 Mean :39915142 Mean :39925141 Mean : 289.5 Mean :0.0070622
3 : 0 3rd Qu.:57725001 3rd Qu.:57735000 3rd Qu.: 524.0 3rd Qu.:0.0127248
4 : 0 Max. :75930001 Max. :75940000 Max. :1204.0 Max. :0.0291995
(Other): 0
------------------------------------------------------------------------------------------------------
pi.concon.df$CHROM: 9
CHROM BIN_START BIN_END N_VARIANTS PI
9 :4362 Min. : 10001 Min. : 20000 Min. : 1.0 Min. :0.0000050
1 : 0 1st Qu.: 30392501 1st Qu.: 30402500 1st Qu.: 9.0 1st Qu.:0.0002049
10 : 0 Median : 61025001 Median : 61035000 Median : 132.5 Median :0.0029893
2 : 0 Mean : 54824455 Mean : 54834454 Mean : 245.0 Mean :0.0058796
3 : 0 3rd Qu.: 76847501 3rd Qu.: 76857500 3rd Qu.: 453.0 3rd Qu.:0.0110273
4 : 0 Max. :104140001 Max. :104150000 Max. :1231.0 Max. :0.0284097
(Other): 0
cor(pi.concon.df$N_VARIANTS, pi.concon.df$PI)
[1] 0.9810872
summary(pi.conrod.df)
CHROM BIN_START BIN_END N_VARIANTS PI
5 :7634 Min. : 1 Min. : 10000 Min. : 1.0 Min. :0.000005
3 :5251 1st Qu.: 18240001 1st Qu.: 18250000 1st Qu.: 72.0 1st Qu.:0.001714
1 :4768 Median : 36570001 Median : 36580000 Median : 344.0 Median :0.008580
2 :4502 Mean : 38078649 Mean : 38088648 Mean : 333.2 Mean :0.008443
4 :4497 3rd Qu.: 54980001 3rd Qu.: 54990000 3rd Qu.: 528.0 3rd Qu.:0.013444
9 :4360 Max. :104140001 Max. :104150000 Max. :1240.0 Max. :0.033498
(Other):9826
by(pi.conrod.df, pi.conrod.df$CHROM, summary)
pi.conrod.df$CHROM: 1
CHROM BIN_START BIN_END N_VARIANTS PI
1 :4768 Min. : 1 Min. : 10000 Min. : 1.0 Min. :0.000005
10 : 0 1st Qu.:17377501 1st Qu.:17387500 1st Qu.: 143.0 1st Qu.:0.003494
2 : 0 Median :32855001 Median :32865000 Median : 377.0 Median :0.009522
3 : 0 Mean :32971295 Mean :32981294 Mean : 360.1 Mean :0.009078
4 : 0 3rd Qu.:49642501 3rd Qu.:49652500 3rd Qu.: 538.0 3rd Qu.:0.013615
5 : 0 Max. :65650001 Max. :65660000 Max. :1139.0 Max. :0.030267
(Other): 0
------------------------------------------------------------------------------------------------------
pi.conrod.df$CHROM: 10
CHROM BIN_START BIN_END N_VARIANTS PI
10 :1169 Min. : 1 Min. : 10000 Min. : 1.0 Min. :0.000005
1 : 0 1st Qu.: 5370001 1st Qu.: 5380000 1st Qu.: 4.0 1st Qu.:0.000085
2 : 0 Median :11370001 Median :11380000 Median : 51.0 Median :0.001161
3 : 0 Mean :14393611 Mean :14403610 Mean : 212.1 Mean :0.005374
4 : 0 3rd Qu.:23840001 3rd Qu.:23850000 3rd Qu.: 389.0 3rd Qu.:0.009952
5 : 0 Max. :32640001 Max. :32650000 Max. :1076.0 Max. :0.033498
(Other): 0
------------------------------------------------------------------------------------------------------
pi.conrod.df$CHROM: 2
CHROM BIN_START BIN_END N_VARIANTS PI
2 :4502 Min. : 120001 Min. : 130000 Min. : 1.0 Min. :0.000005
1 : 0 1st Qu.:12972501 1st Qu.:12982500 1st Qu.: 226.2 1st Qu.:0.005667
10 : 0 Median :27065001 Median :27075000 Median : 422.0 Median :0.010671
3 : 0 Mean :28308853 Mean :28318852 Mean : 396.1 Mean :0.010163
4 : 0 3rd Qu.:42487501 3rd Qu.:42497500 3rd Qu.: 565.0 3rd Qu.:0.014577
5 : 0 Max. :61750001 Max. :61760000 Max. :1089.0 Max. :0.027570
(Other): 0
------------------------------------------------------------------------------------------------------
pi.conrod.df$CHROM: 3
CHROM BIN_START BIN_END N_VARIANTS PI
3 :5251 Min. : 170001 Min. : 180000 Min. : 1.0 Min. :0.000005
1 : 0 1st Qu.:24385001 1st Qu.:24395000 1st Qu.: 142.0 1st Qu.:0.003440
10 : 0 Median :42590001 Median :42600000 Median : 377.0 Median :0.009299
2 : 0 Mean :41233235 Mean :41243234 Mean : 358.6 Mean :0.008941
4 : 0 3rd Qu.:58225001 3rd Qu.:58235000 3rd Qu.: 535.0 3rd Qu.:0.013371
5 : 0 Max. :77050001 Max. :77060000 Max. :1047.0 Max. :0.031353
(Other): 0
------------------------------------------------------------------------------------------------------
pi.conrod.df$CHROM: 4
CHROM BIN_START BIN_END N_VARIANTS PI
4 :4497 Min. : 1110001 Min. : 1120000 Min. : 1.0 Min. :0.000005
1 : 0 1st Qu.:15300001 1st Qu.:15310000 1st Qu.: 219.0 1st Qu.:0.005305
10 : 0 Median :29080001 Median :29090000 Median : 389.0 Median :0.009781
2 : 0 Mean :29857593 Mean :29867592 Mean : 373.9 Mean :0.009496
3 : 0 3rd Qu.:45260001 3rd Qu.:45270000 3rd Qu.: 524.0 3rd Qu.:0.013444
5 : 0 Max. :58750001 Max. :58760000 Max. :1120.0 Max. :0.028189
(Other): 0
------------------------------------------------------------------------------------------------------
pi.conrod.df$CHROM: 5
CHROM BIN_START BIN_END N_VARIANTS PI
5 :7634 Min. : 660001 Min. : 670000 Min. : 1.0 Min. :0.000005
1 : 0 1st Qu.:25692501 1st Qu.:25702500 1st Qu.: 167.2 1st Qu.:0.004194
10 : 0 Median :49065001 Median :49075000 Median : 387.0 Median :0.009881
2 : 0 Mean :48523953 Mean :48533952 Mean : 370.6 Mean :0.009517
3 : 0 3rd Qu.:70847501 3rd Qu.:70857500 3rd Qu.: 542.0 3rd Qu.:0.014052
4 : 0 Max. :98660001 Max. :98670000 Max. :1191.0 Max. :0.029972
(Other): 0
------------------------------------------------------------------------------------------------------
pi.conrod.df$CHROM: 6
CHROM BIN_START BIN_END N_VARIANTS PI
6 :2227 Min. : 150001 Min. : 160000 Min. : 1.0 Min. :0.0000050
1 : 0 1st Qu.:14325001 1st Qu.:14335000 1st Qu.: 10.0 1st Qu.:0.0002295
10 : 0 Median :30950001 Median :30960000 Median : 180.0 Median :0.0046755
2 : 0 Mean :26823885 Mean :26833884 Mean : 252.6 Mean :0.0067466
3 : 0 3rd Qu.:37505001 3rd Qu.:37515000 3rd Qu.: 458.0 3rd Qu.:0.0125443
4 : 0 Max. :51160001 Max. :51170000 Max. :1090.0 Max. :0.0286459
(Other): 0
------------------------------------------------------------------------------------------------------
pi.conrod.df$CHROM: 7
CHROM BIN_START BIN_END N_VARIANTS PI
7 :2704 Min. : 190001 Min. : 200000 Min. : 1.0 Min. :0.000005
1 : 0 1st Qu.:15647501 1st Qu.:15657500 1st Qu.: 12.0 1st Qu.:0.000270
10 : 0 Median :33775001 Median :33785000 Median : 154.0 Median :0.003904
2 : 0 Mean :31243318 Mean :31253317 Mean : 272.3 Mean :0.006807
3 : 0 3rd Qu.:46242501 3rd Qu.:46252500 3rd Qu.: 519.2 3rd Qu.:0.012744
4 : 0 Max. :57830001 Max. :57840000 Max. :1215.0 Max. :0.028933
(Other): 0
------------------------------------------------------------------------------------------------------
pi.conrod.df$CHROM: 8
CHROM BIN_START BIN_END N_VARIANTS PI
8 :3726 Min. : 20001 Min. : 30000 Min. : 1.0 Min. :0.0000050
1 : 0 1st Qu.:22122501 1st Qu.:22132500 1st Qu.: 21.0 1st Qu.:0.0004945
10 : 0 Median :44115001 Median :44125000 Median : 205.0 Median :0.0050893
2 : 0 Mean :39923834 Mean :39933833 Mean : 291.7 Mean :0.0073059
3 : 0 3rd Qu.:57727501 3rd Qu.:57737500 3rd Qu.: 525.0 3rd Qu.:0.0131929
4 : 0 Max. :75930001 Max. :75940000 Max. :1240.0 Max. :0.0300153
(Other): 0
------------------------------------------------------------------------------------------------------
pi.conrod.df$CHROM: 9
CHROM BIN_START BIN_END N_VARIANTS PI
9 :4360 Min. : 10001 Min. : 20000 Min. : 1.0 Min. :0.0000050
1 : 0 1st Qu.: 30620001 1st Qu.: 30630000 1st Qu.: 9.0 1st Qu.:0.0002091
10 : 0 Median : 61205001 Median : 61215000 Median : 135.0 Median :0.0031654
2 : 0 Mean : 54904600 Mean : 54914599 Mean : 247.6 Mean :0.0060833
3 : 0 3rd Qu.: 76882501 3rd Qu.: 76892500 3rd Qu.: 460.0 3rd Qu.:0.0113625
4 : 0 Max. :104140001 Max. :104150000 Max. :1197.0 Max. :0.0291809
(Other): 0
cor(pi.conrod.df$N_VARIANTS, pi.conrod.df$PI)
[1] 0.9816607
summary(pi.strcon.df)
CHROM BIN_START BIN_END N_VARIANTS PI
5 :7629 Min. : 1 Min. : 10000 Min. : 1.0 Min. :0.000005
3 :5245 1st Qu.: 18220001 1st Qu.: 18230000 1st Qu.: 68.0 1st Qu.:0.001609
1 :4766 Median : 36560001 Median : 36570000 Median : 324.0 Median :0.007816
2 :4496 Mean : 38067987 Mean : 38077986 Mean : 317.2 Mean :0.007773
4 :4493 3rd Qu.: 54980001 3rd Qu.: 54990000 3rd Qu.: 503.0 3rd Qu.:0.012379
9 :4375 Max. :104140001 Max. :104150000 Max. :1203.0 Max. :0.030383
(Other):9837
by(pi.strcon.df, pi.strcon.df$CHROM, summary)
pi.strcon.df$CHROM: 1
CHROM BIN_START BIN_END N_VARIANTS PI
1 :4766 Min. : 1 Min. : 10000 Min. : 1.0 Min. :0.000005
10 : 0 1st Qu.:17392501 1st Qu.:17402500 1st Qu.: 134.0 1st Qu.:0.003263
2 : 0 Median :32875001 Median :32885000 Median : 354.5 Median :0.008639
3 : 0 Mean :32994976 Mean :33004975 Mean : 340.1 Mean :0.008386
4 : 0 3rd Qu.:49677501 3rd Qu.:49687500 3rd Qu.: 511.8 3rd Qu.:0.012604
5 : 0 Max. :65650001 Max. :65660000 Max. :1083.0 Max. :0.028245
(Other): 0
------------------------------------------------------------------------------------------------------
pi.strcon.df$CHROM: 10
CHROM BIN_START BIN_END N_VARIANTS PI
10 :1167 Min. : 1 Min. : 10000 Min. : 1.0 Min. :5.000e-06
1 : 0 1st Qu.: 5375001 1st Qu.: 5385000 1st Qu.: 4.0 1st Qu.:8.699e-05
2 : 0 Median :11350001 Median :11360000 Median : 50.0 Median :1.040e-03
3 : 0 Mean :14379418 Mean :14389417 Mean : 203.9 Mean :4.923e-03
4 : 0 3rd Qu.:23675001 3rd Qu.:23685000 3rd Qu.: 372.0 3rd Qu.:9.060e-03
5 : 0 Max. :32640001 Max. :32650000 Max. :1091.0 Max. :3.038e-02
(Other): 0
------------------------------------------------------------------------------------------------------
pi.strcon.df$CHROM: 2
CHROM BIN_START BIN_END N_VARIANTS PI
2 :4496 Min. : 120001 Min. : 130000 Min. : 1.0 Min. :0.000005
1 : 0 1st Qu.:12977501 1st Qu.:12987500 1st Qu.: 215.0 1st Qu.:0.005052
10 : 0 Median :27045001 Median :27055000 Median : 395.0 Median :0.009629
3 : 0 Mean :28303061 Mean :28313060 Mean : 375.7 Mean :0.009268
4 : 0 3rd Qu.:42482501 3rd Qu.:42492500 3rd Qu.: 535.0 3rd Qu.:0.013337
5 : 0 Max. :61750001 Max. :61760000 Max. :1063.0 Max. :0.026673
(Other): 0
------------------------------------------------------------------------------------------------------
pi.strcon.df$CHROM: 3
CHROM BIN_START BIN_END N_VARIANTS PI
3 :5245 Min. : 170001 Min. : 180000 Min. : 1.0 Min. :0.000005
1 : 0 1st Qu.:24390001 1st Qu.:24400000 1st Qu.: 138.0 1st Qu.:0.003325
10 : 0 Median :42600001 Median :42610000 Median : 358.0 Median :0.008658
2 : 0 Mean :41233854 Mean :41243853 Mean : 344.6 Mean :0.008326
4 : 0 3rd Qu.:58230001 3rd Qu.:58240000 3rd Qu.: 512.0 3rd Qu.:0.012417
5 : 0 Max. :77050001 Max. :77060000 Max. :1108.0 Max. :0.029710
(Other): 0
------------------------------------------------------------------------------------------------------
pi.strcon.df$CHROM: 4
CHROM BIN_START BIN_END N_VARIANTS PI
4 :4493 Min. : 1110001 Min. : 1120000 Min. : 1.0 Min. :0.000005
1 : 0 1st Qu.:15290001 1st Qu.:15300000 1st Qu.: 208.0 1st Qu.:0.004789
10 : 0 Median :29080001 Median :29090000 Median : 373.0 Median :0.008899
2 : 0 Mean :29861717 Mean :29871716 Mean : 359.6 Mean :0.008714
3 : 0 3rd Qu.:45290001 3rd Qu.:45300000 3rd Qu.: 503.0 3rd Qu.:0.012380
5 : 0 Max. :58750001 Max. :58760000 Max. :1021.0 Max. :0.026611
(Other): 0
------------------------------------------------------------------------------------------------------
pi.strcon.df$CHROM: 5
CHROM BIN_START BIN_END N_VARIANTS PI
5 :7629 Min. : 660001 Min. : 670000 Min. : 1.0 Min. :0.000005
1 : 0 1st Qu.:25650001 1st Qu.:25660000 1st Qu.: 161.0 1st Qu.:0.003863
10 : 0 Median :49010001 Median :49020000 Median : 364.0 Median :0.008906
2 : 0 Mean :48465698 Mean :48475697 Mean : 351.7 Mean :0.008700
3 : 0 3rd Qu.:70730001 3rd Qu.:70740000 3rd Qu.: 515.0 3rd Qu.:0.012889
4 : 0 Max. :98660001 Max. :98670000 Max. :1133.0 Max. :0.027765
(Other): 0
------------------------------------------------------------------------------------------------------
pi.strcon.df$CHROM: 6
CHROM BIN_START BIN_END N_VARIANTS PI
6 :2233 Min. : 150001 Min. : 160000 Min. : 1.0 Min. :0.0000050
1 : 0 1st Qu.:14320001 1st Qu.:14330000 1st Qu.: 9.0 1st Qu.:0.0002159
10 : 0 Median :30930001 Median :30940000 Median : 156.0 Median :0.0042449
2 : 0 Mean :26804632 Mean :26814631 Mean : 230.4 Mean :0.0061215
3 : 0 3rd Qu.:37510001 3rd Qu.:37520000 3rd Qu.: 413.0 3rd Qu.:0.0113437
4 : 0 Max. :51240001 Max. :51250000 Max. :1030.0 Max. :0.0279714
(Other): 0
------------------------------------------------------------------------------------------------------
pi.strcon.df$CHROM: 7
CHROM BIN_START BIN_END N_VARIANTS PI
7 :2710 Min. : 190001 Min. : 200000 Min. : 1.0 Min. :0.0000050
1 : 0 1st Qu.:15632501 1st Qu.:15642500 1st Qu.: 11.0 1st Qu.:0.0002554
10 : 0 Median :33755001 Median :33765000 Median : 145.0 Median :0.0034174
2 : 0 Mean :31205141 Mean :31215140 Mean : 254.7 Mean :0.0061798
3 : 0 3rd Qu.:46217501 3rd Qu.:46227500 3rd Qu.: 482.0 3rd Qu.:0.0115770
4 : 0 Max. :57830001 Max. :57840000 Max. :1134.0 Max. :0.0272090
(Other): 0
------------------------------------------------------------------------------------------------------
pi.strcon.df$CHROM: 8
CHROM BIN_START BIN_END N_VARIANTS PI
8 :3727 Min. : 20001 Min. : 30000 Min. : 1.0 Min. :0.0000050
1 : 0 1st Qu.:22135001 1st Qu.:22145000 1st Qu.: 20.0 1st Qu.:0.0004778
10 : 0 Median :44140001 Median :44150000 Median : 195.0 Median :0.0047001
2 : 0 Mean :39954407 Mean :39964406 Mean : 282.7 Mean :0.0068577
3 : 0 3rd Qu.:57755001 3rd Qu.:57765000 3rd Qu.: 510.0 3rd Qu.:0.0123940
4 : 0 Max. :75940001 Max. :75950000 Max. :1203.0 Max. :0.0291044
(Other): 0
------------------------------------------------------------------------------------------------------
pi.strcon.df$CHROM: 9
CHROM BIN_START BIN_END N_VARIANTS PI
9 :4375 Min. : 10001 Min. : 20000 Min. : 1.0 Min. :0.0000050
1 : 0 1st Qu.: 30365001 1st Qu.: 30375000 1st Qu.: 9.0 1st Qu.:0.0002015
10 : 0 Median : 61120001 Median : 61130000 Median : 128.0 Median :0.0028850
2 : 0 Mean : 54841932 Mean : 54851931 Mean : 238.5 Mean :0.0056882
3 : 0 3rd Qu.: 76895001 3rd Qu.: 76905000 3rd Qu.: 441.0 3rd Qu.:0.0106289
4 : 0 Max. :104140001 Max. :104150000 Max. :1150.0 Max. :0.0276833
(Other): 0
cor(pi.strcon.df$N_VARIANTS, pi.strcon.df$PI)
[1] 0.9797857
summary(pi.strrod.df)
CHROM BIN_START BIN_END N_VARIANTS PI
5 :7631 Min. : 1 Min. : 10000 Min. : 1.0 Min. :0.000005
3 :5249 1st Qu.: 18240001 1st Qu.: 18250000 1st Qu.: 69.0 1st Qu.:0.001581
1 :4764 Median : 36580001 Median : 36590000 Median : 328.0 Median :0.007806
2 :4495 Mean : 38079492 Mean : 38089491 Mean : 319.4 Mean :0.007733
4 :4495 3rd Qu.: 54990001 3rd Qu.: 55000000 3rd Qu.: 505.0 3rd Qu.:0.012330
9 :4378 Max. :104140001 Max. :104150000 Max. :1204.0 Max. :0.033066
(Other):9829
by(pi.strrod.df, pi.strrod.df$CHROM, summary)
pi.strrod.df$CHROM: 1
CHROM BIN_START BIN_END N_VARIANTS PI
1 :4764 Min. : 1 Min. : 10000 Min. : 1.0 Min. :0.000005
10 : 0 1st Qu.:17367501 1st Qu.:17377500 1st Qu.: 137.8 1st Qu.:0.003265
2 : 0 Median :32855001 Median :32865000 Median : 366.0 Median :0.008611
3 : 0 Mean :32976376 Mean :32986375 Mean : 348.7 Mean :0.008348
4 : 0 3rd Qu.:49662501 3rd Qu.:49672500 3rd Qu.: 523.2 3rd Qu.:0.012520
5 : 0 Max. :65650001 Max. :65660000 Max. :1204.0 Max. :0.028473
(Other): 0
------------------------------------------------------------------------------------------------------
pi.strrod.df$CHROM: 10
CHROM BIN_START BIN_END N_VARIANTS PI
10 :1167 Min. : 1 Min. : 10000 Min. : 1.0 Min. :5.000e-06
1 : 0 1st Qu.: 5375001 1st Qu.: 5385000 1st Qu.: 4.0 1st Qu.:8.032e-05
2 : 0 Median :11370001 Median :11380000 Median : 49.0 Median :1.076e-03
3 : 0 Mean :14416702 Mean :14426701 Mean : 207.6 Mean :5.007e-03
4 : 0 3rd Qu.:23860001 3rd Qu.:23870000 3rd Qu.: 380.0 3rd Qu.:9.256e-03
5 : 0 Max. :32640001 Max. :32650000 Max. :1092.0 Max. :3.307e-02
(Other): 0
------------------------------------------------------------------------------------------------------
pi.strrod.df$CHROM: 2
CHROM BIN_START BIN_END N_VARIANTS PI
2 :4495 Min. : 120001 Min. : 130000 Min. : 1.0 Min. :0.000005
1 : 0 1st Qu.:12965001 1st Qu.:12975000 1st Qu.: 220.0 1st Qu.:0.005022
10 : 0 Median :27050001 Median :27060000 Median : 410.0 Median :0.009756
3 : 0 Mean :28297690 Mean :28307689 Mean : 385.5 Mean :0.009299
4 : 0 3rd Qu.:42475001 3rd Qu.:42485000 3rd Qu.: 548.0 3rd Qu.:0.013313
5 : 0 Max. :61750001 Max. :61760000 Max. :1016.0 Max. :0.026039
(Other): 0
------------------------------------------------------------------------------------------------------
pi.strrod.df$CHROM: 3
CHROM BIN_START BIN_END N_VARIANTS PI
3 :5249 Min. : 170001 Min. : 180000 Min. : 1.0 Min. :0.000005
1 : 0 1st Qu.:24390001 1st Qu.:24400000 1st Qu.: 137.0 1st Qu.:0.003253
10 : 0 Median :42590001 Median :42600000 Median : 360.0 Median :0.008483
2 : 0 Mean :41238174 Mean :41248173 Mean : 343.6 Mean :0.008230
4 : 0 3rd Qu.:58230001 3rd Qu.:58240000 3rd Qu.: 508.0 3rd Qu.:0.012345
5 : 0 Max. :77050001 Max. :77060000 Max. :1095.0 Max. :0.030999
(Other): 0
------------------------------------------------------------------------------------------------------
pi.strrod.df$CHROM: 4
CHROM BIN_START BIN_END N_VARIANTS PI
4 :4495 Min. : 1110001 Min. : 1120000 Min. : 1.0 Min. :0.000005
1 : 0 1st Qu.:15295001 1st Qu.:15305000 1st Qu.: 205.0 1st Qu.:0.004657
10 : 0 Median :29080001 Median :29090000 Median : 369.0 Median :0.008767
2 : 0 Mean :29858715 Mean :29868714 Mean : 355.4 Mean :0.008627
3 : 0 3rd Qu.:45265001 3rd Qu.:45275000 3rd Qu.: 499.0 3rd Qu.:0.012329
5 : 0 Max. :58750001 Max. :58760000 Max. :1088.0 Max. :0.025404
(Other): 0
------------------------------------------------------------------------------------------------------
pi.strrod.df$CHROM: 5
CHROM BIN_START BIN_END N_VARIANTS PI
5 :7631 Min. : 660001 Min. : 670000 Min. : 1.0 Min. :0.000005
1 : 0 1st Qu.:25655001 1st Qu.:25665000 1st Qu.: 161.0 1st Qu.:0.003800
10 : 0 Median :49040001 Median :49050000 Median : 369.0 Median :0.008900
2 : 0 Mean :48491636 Mean :48501635 Mean : 352.3 Mean :0.008655
3 : 0 3rd Qu.:70785001 3rd Qu.:70795000 3rd Qu.: 517.0 3rd Qu.:0.012793
4 : 0 Max. :98660001 Max. :98670000 Max. :1113.0 Max. :0.027615
(Other): 0
------------------------------------------------------------------------------------------------------
pi.strrod.df$CHROM: 6
CHROM BIN_START BIN_END N_VARIANTS PI
6 :2230 Min. : 150001 Min. : 160000 Min. : 1.0 Min. :0.0000050
1 : 0 1st Qu.:14322501 1st Qu.:14332500 1st Qu.: 10.0 1st Qu.:0.0002097
10 : 0 Median :30955001 Median :30965000 Median : 165.0 Median :0.0041790
2 : 0 Mean :26827521 Mean :26837520 Mean : 233.3 Mean :0.0061332
3 : 0 3rd Qu.:37517501 3rd Qu.:37527500 3rd Qu.: 421.8 3rd Qu.:0.0112801
4 : 0 Max. :51240001 Max. :51250000 Max. :1029.0 Max. :0.0270868
(Other): 0
------------------------------------------------------------------------------------------------------
pi.strrod.df$CHROM: 7
CHROM BIN_START BIN_END N_VARIANTS PI
7 :2704 Min. : 190001 Min. : 200000 Min. : 1.0 Min. :0.0000050
1 : 0 1st Qu.:15657501 1st Qu.:15667500 1st Qu.: 12.0 1st Qu.:0.0002639
10 : 0 Median :33785001 Median :33795000 Median : 146.0 Median :0.0034714
2 : 0 Mean :31246369 Mean :31256368 Mean : 256.8 Mean :0.0061921
3 : 0 3rd Qu.:46232501 3rd Qu.:46242500 3rd Qu.: 485.0 3rd Qu.:0.0115946
4 : 0 Max. :57830001 Max. :57840000 Max. :1136.0 Max. :0.0277927
(Other): 0
------------------------------------------------------------------------------------------------------
pi.strrod.df$CHROM: 8
CHROM BIN_START BIN_END N_VARIANTS PI
8 :3728 Min. : 20001 Min. : 30000 Min. : 1.0 Min. :0.0000050
1 : 0 1st Qu.:22117501 1st Qu.:22127500 1st Qu.: 20.0 1st Qu.:0.0004723
10 : 0 Median :44145001 Median :44155000 Median : 194.5 Median :0.0046746
2 : 0 Mean :39939942 Mean :39949941 Mean : 281.0 Mean :0.0067395
3 : 0 3rd Qu.:57712501 3rd Qu.:57722500 3rd Qu.: 503.2 3rd Qu.:0.0121566
4 : 0 Max. :75940001 Max. :75950000 Max. :1191.0 Max. :0.0288804
(Other): 0
------------------------------------------------------------------------------------------------------
pi.strrod.df$CHROM: 9
CHROM BIN_START BIN_END N_VARIANTS PI
9 :4378 Min. : 10001 Min. : 20000 Min. : 1.0 Min. :0.0000050
1 : 0 1st Qu.: 30562501 1st Qu.: 30572500 1st Qu.: 9.0 1st Qu.:0.0002024
10 : 0 Median : 61135001 Median : 61145000 Median : 131.5 Median :0.0028874
2 : 0 Mean : 54855472 Mean : 54865471 Mean : 241.4 Mean :0.0056743
3 : 0 3rd Qu.: 76877501 3rd Qu.: 76887500 3rd Qu.: 445.0 3rd Qu.:0.0106070
4 : 0 Max. :104140001 Max. :104150000 Max. :1165.0 Max. :0.0281665
(Other): 0
cor(pi.strrod.df$N_VARIANTS, pi.strrod.df$PI)
[1] 0.9795718
col_pal <- c(
"ALL" = "gray70",
"CONCON" = "#0072B2",
"STRCON" = "#56B4E9",
"CONROD" = "#E69F00",
"STRROD" = "#F0E442"
)
df_names <- c("pi.all.df", "pi.concon.df", "pi.strcon.df", "pi.conrod.df", "pi.strrod.df")
df_labels <- c("ALL", "CONCON", "STRCON", "CONROD", "STRROD")
chrom_levels <- as.character(1:10)
summary_list <- list()
for (i in seq_along(df_names)) {
df <- get(df_names[i])
treat <- df_labels[i]
df$TREAT <- factor(treat, levels = names(col_pal))
chrom_summary <- df %>%
group_by(CHROM, TREAT) %>%
summarise(
mean_PI = mean(PI, na.rm = TRUE),
se_PI = sd(PI, na.rm = TRUE) / sqrt(n()),
mean_N_VARIANTS = mean(N_VARIANTS, na.rm = TRUE),
se_N_VARIANTS = sd(N_VARIANTS, na.rm = TRUE) / sqrt(n()),
.groups = "drop"
) %>%
mutate(CHROM = factor(CHROM, levels = chrom_levels))
summary_list[[i]] <- chrom_summary
}
summary_df <- bind_rows(summary_list)
mean_pi_plot <- ggplot(summary_df, aes(x = CHROM, y = mean_PI, fill = TREAT)) +
geom_bar(stat = "identity", position = position_dodge(width = 0.8)) +
geom_errorbar(aes(ymin = mean_PI - se_PI, ymax = mean_PI + se_PI),
position = position_dodge(width = 0.8), width = 0.2) +
scale_fill_manual(values = col_pal, name = "Treatment") +
labs(title = "Mean PI per Chromosome", x = "Chromosome", y = "Mean PI") +
theme_minimal()
mean_n_plot <- ggplot(summary_df, aes(x = CHROM, y = mean_N_VARIANTS, fill = TREAT)) +
geom_bar(stat = "identity", position = position_dodge(width = 0.8)) +
geom_errorbar(aes(ymin = mean_N_VARIANTS - se_N_VARIANTS, ymax = mean_N_VARIANTS + se_N_VARIANTS),
position = position_dodge(width = 0.8), width = 0.2) +
scale_fill_manual(values = col_pal, name = "Treatment") +
labs(title = "Mean number of variants per Chromosome", x = "Chromosome", y = "Mean # variants") +
theme_minimal()
print(mean_pi_plot)
print(mean_n_plot)
ggsave("mean_pi_plot.png", plot = mean_pi_plot, width = 10, height = 6, dpi = 300)
ggsave("mean_n_variants_plot.png", plot = mean_n_plot, width = 10, height = 6, dpi = 300)
for (i in seq_along(df_names)) {
df <- get(df_names[i])
label <- df_labels[i]
p <- ggplot(df, aes(x = N_VARIANTS, y = PI)) +
geom_point(alpha = 0.4) +
geom_smooth(method = "lm", se = FALSE, color = "blue") +
labs(title = paste("Correlation: PI vs # variants —", label),
x = "N_VARIANTS",
y = "PI") +
theme_minimal()
print(p)
}
cor_table <- tibble(
dataset = df_labels,
correlation = map_dbl(df_names, ~ cor(get(.x)$N_VARIANTS, get(.x)$PI, use = "complete.obs"))
)
print(cor_table)
ggplot(pi.all.df, aes(x=CHROM, y=PI,))+
geom_violin(aes(color=CHROM,fill=CHROM))+
geom_boxplot(aes(fill=CHROM), width=0.1,outlier.shape = 23, outlier.color = "black")+
stat_summary(fun=mean, geom="point", shape=23, size=2)+
scale_fill_brewer(palette = "Paired")+
theme_classic()
# List of dataframes and labels
df_names <- c("pi.all.df", "pi.concon.df", "pi.conrod.df", "pi.strcon.df", "pi.strrod.df")
df_labels <- c("All", "ConCon", "ConRod", "StrCon", "StrRod")
# Standard chromosome order
chrom_levels <- as.character(1:10)
# Combine all into one dataframe
combined_df <- purrr::map2_dfr(df_names, df_labels, function(df_name, label) {
df <- get(df_name)
df %>%
mutate(
dataset = label,
CHROM = factor(CHROM, levels = chrom_levels)
)
})
# Faceted violin + boxplot
ggplot(combined_df, aes(x = CHROM, y = PI)) +
geom_violin(aes(color = CHROM, fill = CHROM), trim = FALSE) +
geom_boxplot(aes(fill = CHROM), width = 0.1, outlier.shape = 23, outlier.color = "black") +
stat_summary(fun = mean, geom = "point", shape = 23, size = 2) +
scale_fill_brewer(palette = "Paired") +
labs(title = "PI Distribution by Chromosome (Faceted by Dataset)",
x = "Chromosome", y = "PI") +
facet_wrap(~ dataset, ncol = 2) +
theme_classic() +
theme(legend.position = "none")
hist(mydf$PI,br=40)
boxplot(mydf$PI, ylab="Nuc Diversity")
ggplot(pi.all.df, aes(x=BIN_START, y=PI, color=CHROM))+
geom_point()+
scale_fill_brewer(palette = "Paired") +
scale_x_continuous(labels = label_number(scale = 1e-6, suffix = "M")) +
facet_wrap(~CHROM)+
theme_classic()
# Define dataframe names and labels
df_names <- c("pi.all.df", "pi.concon.df", "pi.conrod.df", "pi.strcon.df", "pi.strrod.df")
df_labels <- c("ALL", "CONCON", "CONROD", "STRCON", "STRROD")
# Get global PI range
all_pi_values <- unlist(lapply(df_names, function(x) get(x)$PI))
global_ymin <- min(all_pi_values, na.rm = TRUE)
global_ymax <- max(all_pi_values, na.rm = TRUE)
# Loop over dataframes
for (i in seq_along(df_names)) {
df <- get(df_names[i])
label <- df_labels[i]
# Ensure CHROM is a factor ordered from 1 to 10
df$CHROM <- factor(df$CHROM, levels = as.character(1:10))
p <- ggplot(df, aes(x = BIN_START, y = PI, color = CHROM)) +
geom_point() +
facet_wrap(~CHROM) +
scale_fill_brewer(palette = "Paired") +
scale_x_continuous(labels = label_number(scale = 1e-6, suffix = "M")) + # Human readable x-axis
ylim(global_ymin, global_ymax) + # Same y-axis for all plots
theme_classic() +
labs(title = paste("PI vs BIN_START -", label),
x = "BIN_START (millions)",
y = "PI")
print(p)
ggsave(filename = paste0("PI_vs_BIN_START_", label, ".png"),
plot = p, width = 10, height = 6, dpi = 300)
}
# Define dataframe names and labels
df_names <- c("pi.all.df", "pi.concon.df", "pi.conrod.df", "pi.strcon.df", "pi.strrod.df")
df_labels <- c("ALL", "CONCON", "CONROD", "STRCON", "STRROD")
# Custom color palette
col_pal <- c(
"ALL" = "gray70",
"CONCON" = "#0072B2",
"STRCON" = "#56B4E9",
"CONROD" = "#E69F00",
"STRROD" = "#F0E442"
)
# Combine all data into one dataframe with treatment labels
all_data <- bind_rows(lapply(seq_along(df_names), function(i) {
df <- get(df_names[i])
df$Treatment <- df_labels[i]
df
}))
# Set CHROM and Treatment as ordered factors
all_data$CHROM <- factor(all_data$CHROM, levels = as.character(1:10))
all_data$Treatment <- factor(all_data$Treatment, levels = df_labels)
# Get global PI range
global_ymin <- min(all_data$PI, na.rm = TRUE)
global_ymax <- max(all_data$PI, na.rm = TRUE)
# Loop through chromosomes 1 to 10
for (chr in 1:10) {
chr_str <- as.character(chr)
chr_data <- filter(all_data, CHROM == chr_str)
p <- ggplot(chr_data, aes(x = BIN_START, y = PI, color = Treatment)) +
geom_point(alpha = 0.6, size = 0.5) +
facet_wrap(~Treatment, nrow = 1) +
scale_color_manual(values = col_pal) +
scale_x_continuous(labels = label_number(scale = 1e-6, suffix = "M")) +
ylim(global_ymin, global_ymax) +
theme_classic() +
labs(
title = paste("Chromosome", chr, "- PI across Treatment Types"),
x = "BIN_START (millions)",
y = "PI"
)
print(p)
ggsave(
filename = paste0("PI_chr", chr, "_across_treatments.png"),
plot = p,
width = 16,
height = 4,
dpi = 300
)
}
# Subset by chrom
mydf.chr1 <- mydf[which(mydf$CHROM=="1"),]
ggplot(mydf.chr1, aes(x=BIN_START, y=PI))+
geom_point()+
theme_classic()
# List of treatment data frames and their labels
df_names <- c("pi.all.df", "pi.concon.df", "pi.conrod.df", "pi.strcon.df", "pi.strrod.df")
df_labels <- c("ALL", "CONCON", "CONROD", "STRCON", "STRROD")
# Step 1: Calculate global PI range across all dataframes
all_pi_values <- unlist(lapply(df_names, function(x) get(x)$PI))
global_ymin <- min(all_pi_values, na.rm = TRUE)
global_ymax <- max(all_pi_values, na.rm = TRUE)
# Step 2: Create plots and save as PNGs
for (j in seq_along(df_names)) {
df <- get(df_names[j])
label <- df_labels[j]
for (i in 1:10) {
chr_data <- df[df$CHROM == as.character(i), ]
p <- ggplot(chr_data, aes(x = BIN_START, y = PI)) +
geom_point() +
theme_classic() +
ggtitle(paste("Treatment:", label, "- Chromosome", i)) +
labs(x = "BIN_START", y = "PI") +
ylim(global_ymin, global_ymax)
filename <- paste0("PI_", label, "_chr", i, ".png")
ggsave(filename = filename, plot = p, width = 8, height = 5, dpi = 300)
}
}
Runs of homozygosity (ROH) are contiguous lengths of homozygous genotypes that are present in an individual due to parents transmitting identical haplotypes to their offspring.
The potential of predicting or estimating individual autozygosity for a subpopulation is the proportion of the autosomal genome above a specified length, termed Froh.
This technique can be used to identify the genomic footprint of inbreeding in conservation programs, as organisms that have undergone recent inbreeding will exhibit long runs of homozygosity. The effect of inbreeding in the resulting sub-populations could be studied by measuring the runs of homozygosity in different individuals.
vcftools --vcf SNP.TRSdp10g1.FIL.vcf --LROH --out ROD.CADO.all.LROH